library(rmarkdown)
library(httr)
library(data.table)
library(dplyr)
library(lubridate)
library(knitr)
library(kableExtra)
library(ggplot2)
library(stringr)
library(magrittr)
library(highcharter)
library(forecast)
library(xlsx)
library(fastDummies)
library(zoo)
library(prophet)
library(tidyr)
library(psych)
library(rsample)
library(caret)
library(MLmetrics)
library(modelr)
fread("https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/nbcn-daily_BAS_previous.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
mutate(
timestamp = as.POSIXct(
paste0(
substr(date, 1, 4),
"-",
substr(date, 5, 6),
"-",
substr(date, 7, 8),
" 00:00:00"),
, format="%Y-%m-%d %H:%M:%S"
)
) %>%
mutate_at(c("gre000d0", "hto000d0", "nto000d0", "prestad0", "rre150d0", "sre000d0", "tre200d0", "tre200dn", "tre200dx", "ure200d0"), as.numeric) %>%
bind_rows(
# fread("test2.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
fread("https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/nbcn-daily_BAS_current.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
mutate(
timestamp = as.POSIXct(
paste0(
substr(date, 1, 4),
"-",
substr(date, 5, 6),
"-",
substr(date, 7, 8),
" 00:00:00"),
, format="%Y-%m-%d %H:%M:%S", tz="Europe/Berlin"
)
) %>%
mutate_at(c("gre000d0", "hto000d0", "nto000d0", "prestad0", "rre150d0", "sre000d0", "tre200d0", "tre200dn", "tre200dx", "ure200d0"), as.numeric)
) %>%
relocate(timestamp) %>%
select(-c(date, `station/location`)) %>%
mutate(
year = as.numeric(substr(timestamp, 1, 4)),
month = as.numeric(substr(timestamp, 6, 7)),
day = as.numeric(substr(timestamp, 9, 10))
) %>%
data.frame() %>%
assign("meteo", ., inherits = TRUE)
meteo %>%
mutate(Date=as.Date(timestamp)) %>%
select(Date, max_temp = tre200dx, min_temp = tre200dn, mean_temp = tre200d0) %>%
filter(year(Date)>1970) -> temp
highchart(type = "stock") %>%
hc_add_series(type = "line",
data = temp,
hcaes(Date, mean_temp),
color = "#B00000",
tooltip = list(pointFormat = "Tagesmittel: {point.mean_temp:.1f} C"),
size = 0.15) %>%
hc_add_series(type = "line",
data = temp,
hcaes(Date, max_temp),
color = "#661200",
tooltip = list(pointFormat = "Tagesmaximum: {point.max_temp:.1f} C"),
size = 0.15) %>%
hc_add_series(type = "line",
data = temp,
hcaes(Date, min_temp),
color = "#DC440E",
tooltip = list(pointFormat = "Tagesminimum: {point.min_temp:.1f} C"),
size = 0.15) %>%
hc_rangeSelector(verticalAlign = "bottom",
selected = 1) %>%
hc_xAxis(title = list(text = "")) %>%
hc_yAxis(title = list(text = ""), opposite = F)
Figure 1.1: Lufttemperatur seit 1864.
httr::GET("https://data.bs.ch/explore/dataset/100233/download/?format=csv&timezone=Europe%2FBerlin") %>%
content(., "text") %>%
fread(sep=";", colClasses = c(timestamp_interval_start_text = "character")) %>%
select(timestamp = timestamp_interval_start_text, netzlast_kwh = stromverbrauch_kwh, grundversorgte_kunden_kwh, freie_kunden_kwh) %>%
arrange(timestamp) %>%
mutate(
year = as.numeric(substr(timestamp, 1, 4)),
month = as.numeric(substr(timestamp, 6, 7)),
day = as.numeric(substr(timestamp, 9, 10))
) %>%
group_by(year, month, day) %>%
filter(year > 2011) %>%
summarise(netzlast_kwh = sum(netzlast_kwh, na.rm = T),
grundversorgte_kunden_kwh = sum(grundversorgte_kunden_kwh, na.rm = T),
freie_kunden_kwh = sum(freie_kunden_kwh, na.rm = T)) %>%
ungroup() %>%
mutate(timestamp = as.POSIXct(
paste0(year, "-", month, "-", day, " 00:00:00"), format="%Y-%m-%d %H:%M:%S", tz="Europe/Berlin")
) %>%
assign("strom_daily", ., inherits = TRUE)
highchart(type = "stock") %>%
hc_add_series(strom_daily %>%
mutate(Date=as.Date(timestamp),
netzlast_kwh = netzlast_kwh/1000000),
type = "line",
hcaes(Date, netzlast_kwh),
color = "#923F8D",
tooltip = list(pointFormat = "Stromverbrauch: {point.netzlast_kwh:.2f} GWh"),
size = 0.15) %>%
hc_rangeSelector(verticalAlign = "bottom",
selected = 1) %>%
hc_xAxis(title = list(text = "")) %>%
hc_yAxis(title = list(text = ""), opposite = F)
Figure 1.2: Täglicher Stromverbrauch seit 2012.
httr::GET("https://data.bs.ch/explore/dataset/100074/download/?format=csv&timezone=Europe%2FBerlin") %>%
content(., "text") %>%
fread(sep=";") %>%
select(tag_datum, name, code, kategorie_name) %>%
filter(name != "Fasnachtsmontag", name != "Fasnachtsmittwoch", name != "Dies Academicus") %>%
filter(!(tag_datum == "2008-05-01 00:00:00" & name == "Tag der Arbeit")) %>% # Tag der Arbeit doubles with Auffahrt
filter(kategorie_name %in% c("Feiertag", "Ferien") | code == "herbstm") %>%
filter(name != "Semesterferien") %>%
assign("rd_veranst", ., inherits = TRUE)
rd_veranst %>%
data.frame() %>%
mutate(Herbstmesse = if_else(code == "herbstm", "Herbstmesse", "")) %>%
select(tag_datum, Herbstmesse) %>%
filter(Herbstmesse != "") %>%
full_join(
rd_veranst %>%
mutate(Feiertage = if_else(kategorie_name == "Feiertag", name, "")) %>%
select(tag_datum, Feiertage) %>%
filter(Feiertage != ""),
by = "tag_datum"
) %>%
full_join(
rd_veranst %>%
mutate(Ferien = if_else(kategorie_name == "Ferien", name, "")) %>%
select(tag_datum, Ferien) %>%
filter(Ferien != ""),
by = "tag_datum"
) %>%
mutate(timestamp = as.POSIXct(tag_datum, tz="Europe/Berlin")) %>%
filter(year(timestamp) > 2011) %>%
select(timestamp, Herbstmesse, Feiertage, Ferien) %>%
mutate(
year = lubridate::year(timestamp),
month = lubridate::month(lubridate::floor_date(timestamp, "month")),
day = lubridate::day(lubridate::floor_date(timestamp, "day"))
) %>%
data.frame() %>%
assign("Veranstaltungen", ., inherits = TRUE)
rm(events, rd_veranst)
meteo %>%
full_join(Veranstaltungen %>%
select(-timestamp), by = c("year" = "year", "month" = "month", "day" = "day")) %>%
full_join(strom_daily %>%
select(-timestamp), by = c("year" = "year", "month" = "month", "day" = "day")) %>%
mutate(weekday = lubridate::wday(timestamp, label = TRUE, abbr = TRUE, locale="German_Germany"),
daytype = if_else(weekday %in% c("So", "Sa"), "Wochenende", "Werktage")) %>%
mutate(Covid19_Lockdown = if_else(timestamp < as.POSIXct("2020-03-15", format="%Y-%m-%d"), 0,
if_else(timestamp > as.POSIXct("2020-05-10", format="%Y-%m-%d"), 0, 1))
) %>%
relocate(timestamp, netzlast_kwh, grundversorgte_kunden_kwh, freie_kunden_kwh) %>%
filter(year(timestamp) > 2011) %>%
mutate(HGT = if_else(tre200d0 <= 12, 20-tre200d0, 0)) %>%
mutate(Herbstmesse = if_else(is.na(Herbstmesse), "No", Herbstmesse),
Feiertage = if_else(is.na(Feiertage), "No", Feiertage),
Ferien = if_else(is.na(Ferien), "No", Ferien),
HGT_sq = HGT^2) %>%
drop_na() %>%
mutate(time = as.Date(timestamp, tz = 'Europe/Berlin')) %>%
select(-timestamp) %>%
relocate(time) %>%
assign("Data", ., inherits = TRUE)
Data %>%
mutate(time = as.Date(time),
Feiertage_dummy = if_else(Feiertage == "No", 0, 1),
Ferien_dummy = if_else(Ferien == "No", 0, 1),
Herbstmesse_dummy = if_else(Herbstmesse == "No", 0, 1),
Wochenende_dummy = if_else(daytype == "Werktage", 0, 1),
Mo_dummy = if_else(weekday == "Mo", 1, 0),
Di_dummy = if_else(weekday == "Di", 1, 0),
Mi_dummy = if_else(weekday == "Mi", 1, 0),
Do_dummy = if_else(weekday == "Do", 1, 0),
Fr_dummy = if_else(weekday == "Fr", 1, 0),
Sa_dummy = if_else(weekday == "Sa", 1, 0),
So_dummy = if_else(weekday == "So", 1, 0),
time_num = as.numeric(time),
Neujahrstag_dummy = if_else(Feiertage == "Neujahrstag", 1, 0),
Karfreitag_dummy = if_else(Feiertage == "Karfreitag", 1, 0),
Ostersonntag_dummy = if_else(Feiertage == "Ostersonntag", 1, 0),
Ostermontag_dummy = if_else(Feiertage == "Ostermontag", 1, 0),
Tag_der_Arbeit_dummy = if_else(Feiertage == "Tag der Arbeit", 1, 0),
Auffahrt_dummy = if_else(Feiertage == "Auffahrt", 1, 0),
Pfingssonntag_dummy = if_else(Feiertage == "Pfingssonntag", 1, 0),
Pfingstmontag_dummy = if_else(Feiertage == "Pfingstmontag", 1, 0),
Bundesfeiertag_dummy = if_else(Feiertage == "Bundesfeiertag", 1, 0),
Heiligabend_dummy = if_else(Feiertage == "Heiligabend", 1, 0),
Weihnachten_dummy = if_else(Feiertage == "Weihnachten", 1, 0),
Stephanstag_dummy = if_else(Feiertage == "Stephanstag", 1, 0),
Silvester_dummy = if_else(Feiertage == "Silvester", 1, 0),
Weihnachtsferien_dummy = if_else(Feiertage == "Weihnachtsferien", 1, 0),
Fasnachtsferien_dummy = if_else(Feiertage == "Fasnachtsferien (Sportferien)", 1, 0),
Osterferien_dummy = if_else(Feiertage == "Osterferien (Frühlingsferien)", 1, 0),
Sommerferien_dummy = if_else(Feiertage == "Sommerferien", 1, 0),
Herbstferien_dummy = if_else(Feiertage == "Herbstferien", 1, 0),
tre200d0_sq = tre200d0^2
) %>%
dplyr::select(-c(grundversorgte_kunden_kwh, freie_kunden_kwh, year, month, day, Herbstmesse, Feiertage, Ferien, daytype, weekday)) %>%
mutate(
Ettape = case_when(
time < as.Date("2022-07-01") ~ "Train",
time >= as.Date("2022-07-01") & time < as.Date("2023-05-01") ~ "Test",
time >= as.Date("2023-05-01") ~ "Prognose"
)
) %>%
assign("Data_selec", ., inherits = TRUE)
set.seed(1234)
Data_model <- Data_selec %>%
filter(time < as.Date("2023-05-01")) %>%
mutate(time_num = as.numeric(time)) %>%
arrange(time)
# Data_prognose <- Data_selec %>%
# filter(time >= as.Date("2023-05-01"))
tab_split <- initial_split(Data_model, prop = .7)
tab_train <- training(tab_split)
tab_test <- testing(tab_split)
model_lm <- lm(netzlast_kwh ~ time_num + I(time_num^2) + Di_dummy + Mi_dummy + Do_dummy + Fr_dummy + Sa_dummy + So_dummy + gre000d0 + hto000d0 +
# nto000d0 + prestad0 +
rre150d0 + sre000d0 + tre200d0 + I(tre200d0^2) +
# tre200dn + tre200dx +
ure200d0 + Covid19_Lockdown + HGT + I(HGT^2) + Neujahrstag_dummy + Karfreitag_dummy +
# Ostersonntag_dummy +
Ostermontag_dummy + Tag_der_Arbeit_dummy + Auffahrt_dummy +
# Pfingssonntag_dummy +
Pfingstmontag_dummy + Bundesfeiertag_dummy + Heiligabend_dummy + Weihnachten_dummy + Stephanstag_dummy + Silvester_dummy + Ferien_dummy +
time_num * Wochenende_dummy +
Neujahrstag_dummy * Wochenende_dummy +
# Karfreitag_dummy * Wochenende_dummy +
Tag_der_Arbeit_dummy * Wochenende_dummy +
# Auffahrt_dummy * Wochenende_dummy +
Bundesfeiertag_dummy * Wochenende_dummy +
Heiligabend_dummy * Wochenende_dummy +
Stephanstag_dummy * Wochenende_dummy, data = tab_train)
Data_selec %>%
bind_cols(
predict(model_lm, Data_selec, interval = "prediction")
) %>%
select(time, fit, lwr, upr, netzlast_kwh) %>%
mutate(
Ettape = case_when(
time < as.Date("2022-07-01") ~ "Train",
time >= as.Date("2022-07-01") & time < as.Date("2023-05-01") ~ "Test",
time >= as.Date("2023-05-01") ~ "Prognose"
)
) %>%
mutate(resid = netzlast_kwh - fit) %>%
assign("Pred_lm", ., inherits = TRUE)
write.csv2(Pred_lm, "100245_Strom_Wetter.csv", row.names=F, na = "")
Pred_lm %>%
mutate(time_num = as.numeric(time)) %>%
assign("Pred_lm", ., inherits = TRUE)
| Ettape | R2 | RMSE | MAE |
|---|---|---|---|
| Train | 0.9503770 | 105297.29 | 79198.61 |
| Test | 0.9063178 | 94903.66 | 69022.56 |
| Prognose | 0.9414487 | 85902.75 | 66292.40 |
Pred_lm %>%
filter(time >= as.Date("2023-07-01")) %>%
mutate(netzlast_kwh = netzlast_kwh/1000000,
fit = fit/1000000,
lwr = lwr/1000000,
upr = upr/1000000) -> recent
hchart(recent, "line", hcaes(time, netzlast_kwh), color = "#008AC3",
tooltip = list(pointFormat = "Effektiver Stromverbrauch: {point.netzlast_kwh:.2f} GWh",
shared = TRUE),
zIndex = 1) %>%
hc_add_series(recent, "line", hcaes(time, fit), color = "#B375AB",
tooltip = list(pointFormat = "Erwarteter Stromverbrauch: {point.fit:.2f} GWh",
shared = TRUE),
zIndex = 2) %>%
hc_add_series(recent, type = "arearange",
hcaes(x = time, low = lwr, high = upr),
zIndex = 0,
color = "#E7CEE2",
tooltip = list(pointFormat = "95% Konfidenzintervall: {point.lwr:.2f} - {point.upr:.2f} GWh"), shared = TRUE
) %>%
hc_xAxis(title = list(text = "")) %>%
hc_yAxis(title = list(text = "")) %>%
hc_plotOptions(series = list(marker = list(enabled = FALSE)))
Figure 2.1: Effektiver und erwarteter täglicher Stromverbrauch.
Hinweis: Je nach Kombination von Betriebssystemen und Versionen von RStudio, R und dem Paket prophet können die Ergebnisse leicht von den publizierten Resultaten abweichen. Grund dafür sind Unterschiede am Zufalls-Generator für die Prognose des Modells. Die angewendete Konfiguration lautet:
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=German_Switzerland.utf8 LC_CTYPE=German_Switzerland.utf8
## [3] LC_MONETARY=German_Switzerland.utf8 LC_NUMERIC=C
## [5] LC_TIME=German_Switzerland.utf8
##
## attached base packages:
## [1] stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] modelr_0.1.11 MLmetrics_1.1.1 caret_6.0-94 lattice_0.21-8
## [5] rsample_1.2.0 psych_2.3.6 tidyr_1.3.0 prophet_1.0
## [9] rlang_1.1.1 Rcpp_1.0.11 zoo_1.8-11 fastDummies_1.7.3
## [13] xlsx_0.6.5 forecast_8.21.1 highcharter_0.9.4 magrittr_2.0.3
## [17] stringr_1.5.0 ggplot2_3.4.4 kableExtra_1.3.4 knitr_1.43
## [21] lubridate_1.9.2 dplyr_1.1.3 data.table_1.14.8 httr_1.4.7
## [25] rmarkdown_2.25
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-163 xts_0.12.2 webshot_0.5.5
## [4] tools_4.2.2 backports_1.4.1 bslib_0.5.1
## [7] utf8_1.2.3 R6_2.5.1 rpart_4.1.19
## [10] colorspace_2.1-0 nnet_7.3-19 withr_2.5.0
## [13] tidyselect_1.2.0 mnormt_2.1.1 curl_5.0.2
## [16] compiler_4.2.2 cli_3.6.1 rvest_1.0.3
## [19] xml2_1.3.3 bookdown_0.35 sass_0.4.7
## [22] tseries_0.10-54 scales_1.2.1 lmtest_0.9-40
## [25] fracdiff_1.5-2 quadprog_1.5-8 systemfonts_1.0.4
## [28] digest_0.6.33 svglite_2.1.1 pkgconfig_2.0.3
## [31] htmltools_0.5.6 parallelly_1.36.0 highr_0.10
## [34] fastmap_1.1.1 htmlwidgets_1.6.2 TTR_0.24.3
## [37] rstudioapi_0.15.0 quantmod_0.4.25 jquerylib_0.1.4
## [40] generics_0.1.3 jsonlite_1.8.7 ModelMetrics_1.2.2.2
## [43] rlist_0.4.6.2 Matrix_1.6-1 munsell_0.5.0
## [46] fansi_1.0.4 lifecycle_1.0.3 furrr_0.3.1
## [49] pROC_1.18.4 stringi_1.7.12 yaml_2.3.6
## [52] MASS_7.3-60 recipes_1.0.8 plyr_1.8.8
## [55] grid_4.2.2 parallel_4.2.2 listenv_0.9.0
## [58] splines_4.2.2 xlsxjars_0.6.1 pillar_1.9.0
## [61] igraph_1.5.1 stats4_4.2.2 reshape2_1.4.4
## [64] future.apply_1.11.0 codetools_0.2-19 urca_1.3-3
## [67] glue_1.6.2 evaluate_0.21 renv_1.0.3
## [70] RcppParallel_5.1.7 foreach_1.5.2 vctrs_0.6.3
## [73] gtable_0.3.4 purrr_1.0.2 future_1.33.0
## [76] assertthat_0.2.1 cachem_1.0.8 gower_1.0.1
## [79] xfun_0.39 prodlim_2023.08.28 broom_1.0.5
## [82] survival_3.5-7 class_7.3-22 viridisLite_0.4.2
## [85] timeDate_4022.108 tibble_3.2.1 iterators_1.0.14
## [88] rJava_1.0-6 hardhat_1.3.0 lava_1.7.2.1
## [91] timechange_0.2.0 globals_0.16.2 ellipsis_0.3.2
## [94] ipred_0.9-14
renv::snapshot()
## - The lockfile is already up to date.